pos = replicate(1000, sum(runif(16,-1,1)))
hist(pos)
big = replicate(1e4, prod(1 + runif(12, 0, 0.5)))
small = replicate(1e4, prod(1 + runif(12, 0, 0.01)))
dens(small, norm.comp=TRUE)
dens(big, norm.comp=TRUE)
logbig = replicate(1e4, log(prod(1 + runif(12, 0, 0.5))))
dens(logbig, norm.comp=TRUE)
data(Howell1)
df = Howell1
head(df)
## height weight age male
## 1 151.765 47.82561 63 1
## 2 139.700 36.48581 63 0
## 3 136.525 31.86484 65 0
## 4 156.845 53.04191 41 1
## 5 145.415 41.27687 51 0
## 6 163.830 62.99259 35 1
precis(df)
## mean sd 5.5% 94.5% histogram
## height 138.2635963 27.6024476 81.108550 165.73500 ▁▁▁▁▁▁▁▂▁▇▇▅▁
## weight 35.6106176 14.7191782 9.360721 54.50289 ▁▂▃▂▂▂▂▅▇▇▃▂▁
## age 29.3443934 20.7468882 1.000000 66.13500 ▇▅▅▃▅▂▂▁▁
## male 0.4724265 0.4996986 0.000000 1.00000 ▇▁▁▁▁▁▁▁▁▇
df2 = df[df$age >= 18,]
dens(df2$height)
### p. 85
# Prior for height mean
curve(dnorm(x, 178, 20), from=100, to=250)
# Prior for height std
curve(dunif(x, 0, 50), from=-10, to=60)
sample_mu = rnorm(1e4, 178, 20)
sample_sigma = runif(1e4, 0, 50)
prior_h = rnorm(1e4, sample_mu, sample_sigma)
dens(prior_h)
sample_mu = rnorm(1e4, 178, 100)
prior_h = rnorm(1e4, sample_mu, sample_sigma)
dens(prior_h)
mu.list = seq(from=150, to=160, length.out=100)
sigma.list = seq(from=7, to=9, length.out=100)
post = expand.grid(mu=mu.list, sigma=sigma.list)
post$LL = sapply(1:nrow(post), function(i) sum(dnorm(df2$height, post$mu[i], post$sigma[i], log=TRUE)))
post$prod = post$LL + dnorm(post$mu, 178, 20, log=TRUE)
post$prob = exp(post$prod - max(post$prod))
image_xyz(post$mu, post$sigma, post$prob)
sample.rows = sample(1:nrow(post), size=1e4, replace=TRUE, prob=post$prob)
sample.mu = post$mu[sample.rows]
sample.sigma = post$sigma[sample.rows]
plot(sample.mu, sample.sigma, cex=0.5, pch=16, col=col.alpha(rangi2, 0.1))
dens(sample.mu)
dens(sample.sigma)
PI(sample.mu)
## 5% 94%
## 153.9394 155.2525
PI(sample.sigma)
## 5% 94%
## 7.303030 8.252525
df3 = sample(df2$height, size=20)
mu.list = seq(from=150, to=170, length.out=200)
sigma.list = seq(from=4, to=20, length.out=200)
post2 = expand.grid(mu=mu.list, sigma=sigma.list)
post2$LL = sapply( 1:nrow(post2), function(i) sum(dnorm(df3, mean=post2$mu[i], sd=post2$sigma[i], log=TRUE)))
post2$prod = post2$LL + dnorm(post2$mu, 178, 20, TRUE) + dunif(post2$sigma, 0, 50, TRUE)
post2$prob = exp(post2$prod - max(post2$prod))
sample2.rows = sample(1:nrow(post2), size=1e4, replace=TRUE, prob=post2$prob)
sample2.mu = post2$mu[sample2.rows]
sample2.sigma = post2$sigma[sample2.rows]
plot(sample2.mu, sample2.sigma, cex=0.5, col=col.alpha(rangi2,0.1), xlab="mu", ylab="sigma", pch=16)
dens(sample2.sigma, norm.comp=TRUE)
flist = alist(
height ~ dnorm(mu, sigma),
mu ~ dnorm(178, 20),
sigma ~ dunif(0, 50)
)
m4.1 = quap(flist, data=df2)
precis(m4.1)
## mean sd 5.5% 94.5%
## mu 154.607234 0.4119549 153.948850 155.265618
## sigma 7.730586 0.2913157 7.265008 8.196165
flist = alist(
height ~ dnorm(mu, sigma),
mu ~ dnorm(178, 0.1),
sigma ~ dunif(0, 50)
)
m4.2 = quap(flist, data=df2)
precis(m4.2)
## mean sd 5.5% 94.5%
## mu 177.86375 0.1002354 177.70356 178.02395
## sigma 24.51757 0.9289237 23.03297 26.00217
mcov = vcov(m4.1)
diag(mcov)
## mu sigma
## 0.16970686 0.08486483
cov2cor(mcov)
## mu sigma
## mu 1.000000000 0.001854566
## sigma 0.001854566 1.000000000
post = extract.samples(m4.1, n=1e4)
head(post)
## mu sigma
## 1 154.1115 8.361579
## 2 154.5655 7.765194
## 3 154.4405 7.925284
## 4 154.2046 7.973574
## 5 154.4444 7.622452
## 6 154.3674 7.866277
precis(post)
## mean sd 5.5% 94.5% histogram
## mu 154.606368 0.4112522 153.953827 155.256155 ▁▁▁▅▇▂▁▁
## sigma 7.729598 0.2943963 7.265501 8.203861 ▁▁▁▂▅▇▇▃▁▁▁
plot(post)
plot(df2$height ~ df2$weight)
set.seed(2971)
N = 100
a = rnorm(N, 178, 20)
b = rnorm(N, 0, 10)
plot_lines = function() {
plot(NULL, xlim=range(df2$weight), ylim=c(-100,400), xlab="weight", ylab="height")
abline(h=0, lty=2)
abline(h=272, lty=1, lwd=0.5)
mtext("b ~ dnorm(0,10)")
xbar = mean(df2$weight)
for(i in 1:N) curve(a[i] + b[i]*(x - xbar), from=min(df2$weight) , to=max(df2$weight) , add=TRUE , col=col.alpha("black",0.2))
}
plot_lines()
b = rlnorm(1e4, 0, 1)
dens(b, xlim=c(0,5), adj=0.1)
set.seed(2971)
N = 100
a = rnorm(N, 178, 20)
b = rlnorm(N, 0, 1)
plot_lines()
xbar = mean(df2$weight)
flist = alist(
height ~ dnorm(mu, sigma),
mu <- a + b * (weight - xbar),
a ~ dnorm(178, 20),
b ~ dlnorm(0, 1),
sigma ~ dunif(0, 50)
)
m4.3 = quap(flist, data=df2)
precis(m4.3)
## mean sd 5.5% 94.5%
## a 154.6013671 0.27030766 154.1693633 155.0333710
## b 0.9032807 0.04192363 0.8362787 0.9702828
## sigma 5.0718809 0.19115478 4.7663786 5.3773831
round(vcov(m4.3), 3)
## a b sigma
## a 0.073 0.000 0.000
## b 0.000 0.002 0.000
## sigma 0.000 0.000 0.037
plot(height ~ weight, data=df2, col=rangi2)
post = extract.samples(m4.3)
a_map = mean(post$a)
b_map = mean(post$b)
curve(a_map + b_map * (x - xbar), add=TRUE)
plot_n_curves = function(N) {
dfN = df2[1:N,]
mN = quap(flist, data=dfN)
post = extract.samples(mN, n=N)
# display raw data and sample size
plot(dfN$weight, dfN$height,
xlim=range(df2$weight), ylim=range(df2$height),
col=rangi2, xlab="weight", ylab="height")
mtext(concat("N = ", N))
# plot the lines, with transparency
xbar = mean(dfN$weight)
for (i in 1:N) {
curve(post$a[i] + post$b[i] * (x - xbar), col=col.alpha("black", 0.3), add=TRUE)
}
}
plot_n_curves(20)
plot_n_curves(30)
plot_n_curves(40)
plot_n_curves(50)
post = extract.samples(m4.3)
mu_at_50 = post$a + post$b * (50 - xbar)
dens(mu_at_50, col=rangi2, lwd=2, xlab="mu|weight=50")
weight.seq = seq(from=25, to=70, by=1)
mu = link(m4.3, data=data.frame(weight=weight.seq)) # Book p. 110
plot(height ~ weight, df2, type="n")
for (i in 1:N) {
points(weight.seq, mu[i,], pch=16, col=col.alpha(rangi2, 0.1))
}
mu.mean = apply(mu, 2, mean)
mu.PI = apply(mu, 2, PI, prob=0.89)
plot(height ~ weight, data=df2, col=col.alpha(rangi2, 0.5))
lines(weight.seq, mu.mean)
shade(mu.PI, weight.seq)
sim.height = sim(m4.3, data=list(weight=weight.seq), n=1e4)
height.PI = apply(sim.height, 2, PI, prob=0.89)
plot(height ~ weight, df2, col=col.alpha(rangi2, 0.5))
lines(weight.seq, mu.mean)
shade(mu.PI, weight.seq)
shade(height.PI, weight.seq)
poly2 = alist(
height ~ dnorm(mu, sigma),
mu <- a + b1 * weight_s + b2 * weight_s2,
a ~ dnorm(178, 20),
b1 ~ dlnorm(0, 1),
b2 ~ dnorm(0, 1),
sigma ~ dunif(0, 50)
)
poly3 = alist(
height ~ dnorm(mu, sigma),
mu <- a + b1 * weight_s + b2 * weight_s2 + b3 * weight_s3,
a ~ dnorm(178, 20),
b1 ~ dlnorm(0, 1),
b2 ~ dnorm(0, 1),
b3 ~ dnorm(0, 1),
sigma ~ dunif(0, 50)
)
df$weight_s = (df$weight - mean(df$weight)) / sd(df$weight)
df$weight_s2 = df$weight_s ^ 2
df$weight_s3 = df$weight_s ^ 3
plot_model = function(poly, degree=2) {
model = quap(poly, data=df)
weight.seq = seq(from=-2.2, to=2, length.out=30)
if (degree == 2) pred_dat = list(weight_s=weight.seq, weight_s2=weight.seq^2)
else pred_dat = list(weight_s=weight.seq, weight_s2=weight.seq ^ 2, weight_s3=weight.seq^3)
mu = link(model, data=pred_dat)
mu.mean = apply(mu, 2, mean)
mu.PI = apply(mu, 2, PI, prob=0.89)
sim.height = sim(model, data=pred_dat)
height.PI = apply(sim.height, 2, PI , prob=0.89)
plot(height ~ weight_s, df, col=col.alpha(rangi2, 0.5))
lines(weight.seq, mu.mean)
shade(mu.PI, weight.seq)
shade(height.PI, weight.seq)
}
plot_model(poly2)
plot_model(poly3, degree=3)